Mapping of Ki67 interactions with the genome and comparison with lamina interactions.
Various analyses of HCT116 cells before and after Ki67 loss using a Ki67-AID degron system.
NA
Set the parameters and list the data.
# Load dependencies - without warnings / messages
library(tidyverse)
library(GenomicRanges)
library(rtracklayer)
library(ggplot2)
library(RColorBrewer)
library(GGally)
library(corrr)
library(caTools)
library(ggrastr)
library(colorspace)
# Prepare output
output_dir <- "ts220503_7_hct116_ki67_aid"
dir.create(output_dir, showWarnings = FALSE)
# Load input
chromosomes <- c(paste0("chr", 1:22), "chrX")
input_dir <- "ts220503_1_data_gathering"
bin_size <- readRDS(file.path(input_dir, "bin_size.rds"))
centromeres <- readRDS(file.path(input_dir, "centromeres.rds"))
colors_set1 <- readRDS(file.path(input_dir, "colors_set1.rds"))
colors_set2 <- readRDS(file.path(input_dir, "colors_set2.rds"))
colors_set3 <- readRDS(file.path(input_dir, "colors_set3.rds"))
tib_padamid_replicates <- readRDS(file.path(input_dir,
"tib_padamid_replicates.rds"))
tib_padamid_combined <- readRDS(file.path(input_dir,
"tib_padamid_combined.rds"))
gr_padamid_replicates <- readRDS(file.path(input_dir,
"gr_padamid_replicates.rds"))
gr_padamid_combined <- readRDS(file.path(input_dir,
"gr_padamid_combined.rds"))
tib_hmm_replicates <- readRDS(file.path(input_dir, "tib_hmm_replicates.rds"))
tib_hmm_combined <- readRDS(file.path(input_dir, "tib_hmm_combined.rds"))
padamid_metadata_replicates <- readRDS(file.path(input_dir,
"padamid_metadata_replicates.rds"))
padamid_metadata_combined <- readRDS(file.path(input_dir,
"padamid_metadata_combined.rds"))
# Prepare seqnames
chrom_sizes <- tibble(seqnames = seqlevels(gr_padamid_combined),
length = seqlengths(gr_padamid_combined)) %>%
arrange(-length)
# Scale pA-DamID scores?
tib_padamid_combined <- tib_padamid_combined %>%
mutate_at(4:ncol(.), function(x) scale(x)[, 1]) %>%
filter(seqnames != "chrY")library(knitr)
opts_chunk$set(fig.width = 10, fig.height = 4, cache = T,
dev=c('png', 'pdf'), fig.path = file.path(output_dir, "figures/"))
pdf.options(useDingbats = FALSE)# From Fede:
# ggpairs custom functions
corColor <- function(data, mapping, color = I("black"), sizeRange = c(1, 3), ...) {
x <- eval_data_col(data, mapping$x)
y <- eval_data_col(data, mapping$y)
r <- cor(x, y)
rt <- format(r, digits = 3)
tt <- as.character(rt)
cex <- max(sizeRange)
# helper function to calculate a useable size
percent_of_range <- function(percent, range) {
percent * diff(range) + min(range, na.rm = TRUE)
}
# plot correlation coefficient
p <- ggally_text(label = tt, mapping = aes(), xP = 0.5, yP = 0.5,
# size = I(percent_of_range(cex * abs(r), sizeRange)),
size = 6,
color = color, ...) +
theme(panel.grid.minor=element_blank(),
panel.grid.major=element_blank())
corColors <- RColorBrewer::brewer.pal(n = 7, name = "RdYlBu")[2:6]
if (r <= boundaries[1]) {
corCol <- corColors[1]
} else if (r <= boundaries[2]) {
corCol <- corColors[2]
} else if (r < boundaries[3]) {
corCol <- corColors[3]
} else if (r < boundaries[4]) {
corCol <- corColors[4]
} else {
corCol <- corColors[5]
}
p <- p +
theme(panel.background = element_rect(fill = corCol))
return(p)
}
customScatter <- function (data, mapping)
{
p <- ggplot(data = data, mapping) +
geom_bin2d(bins = 100) +
geom_smooth(method = "lm", se = T, col = "red") +
scale_fill_gradient(low = "lightgrey", high = "black", name = "Count") +
theme_bw()
p
}
PlotScatter <- function(tib, n1, n2, color_by = NULL, identity = F, smooth = 1) {
# Get tibble
tib <- tib %>%
dplyr::select(seqnames, matches(n1), matches(n2)) %>%
rename_all(~ c("seqnames", "n1", "n2"))
if (smooth > 1) {
tib <- tib %>%
group_by(seqnames) %>%
mutate(n1 = runmean(n1, k = smooth),
n2 = runmean(n2, k = smooth))
}
# Prepare color
if (! is.null(color_by)) {
tib <- tib %>%
add_column(color = color_by) %>%
drop_na()
alpha = 1
limits_color <- quantile(tib$color, c(0.001, 0.999), na.rm = T)
tib$color[tib$color < limits_color[1]] <- limits_color[1]
tib$color[tib$color > limits_color[2]] <- limits_color[2]
} else {
tib <- tib %>% drop_na()
tib$color = "1"
alpha = 0.02
}
# Plot
xlimits <- quantile(tib$n1, c(0.001, 0.999), na.rm = T) * 1.4
ylimits <- quantile(tib$n2, c(0.001, 0.999), na.rm = T) * 1.4
plt <- tib %>%
arrange(sample(1:nrow(.), size = nrow(.), replace = F)) %>%
ggplot(aes(x = n1, y = n2, color = color)) +
geom_point(size = 0.5, alpha = alpha) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
xlab(n1) +
ylab(n2) +
ggtitle(paste0("Spearman: ",
round(cor(tib$n1, tib$n2, use = "complete.obs",
method = "spearman"), 2))) +
coord_cartesian(xlim = xlimits, ylim = ylimits) +
theme_bw() +
theme(aspect.ratio = 1)
# Prepare color
if (! is.null(color_by)) {
plt <- plt +
scale_color_gradient2(low = "blue", mid = "grey", high = "red",
midpoint = 0)
} else {
plt <- plt +
scale_color_manual(values = "black", guide = F)
}
if (identity) plt <- plt + geom_abline(slope = 1, intercept = 0, col = "red", linetype = "dashed")
plot(plt)
}
PlotDataTracksLight <- function(input_tib, exp_names, centromeres,
color_groups, plot_chr = "chr1",
plot_start = 1, plot_end = 500e6,
smooth = 1, color_list = NULL,
fix_yaxis = F, aspect_ratio = NULL,
lighten_negative = T, raster = T,
ylimits = NULL) {
# Get the scores for the samples
tib_plot <- input_tib %>%
dplyr::select(seqnames, start, end,
all_of(exp_names))
if (smooth > 1) {
tib_plot <- tib_plot %>%
mutate_at(vars(contains("_")),
runmean, k = smooth)
}
# Filter for plotting window
tib_plot <- tib_plot %>%
filter(seqnames == plot_chr,
start >= plot_start,
end <= plot_end)
# Gather
tib_gather <- tib_plot %>%
gather(key, value, -seqnames, -start, -end) %>%
mutate(key = factor(key, levels = exp_names),
fill_column = color_groups[match(key, names(color_groups))],
fill_column = factor(fill_column, levels = unique(color_groups)))
# If desired, make negative values a lighter shade to improve visualization
if (lighten_negative) {
tib_gather <- tib_gather %>%
mutate(fill_column = interaction(fill_column,
value < 0))
}
# Centromeres
centromeres.plt <- as_tibble(centromeres) %>%
filter(seqnames == plot_chr) %>%
gather(key_centromeres, value, start, end)
# Plot
if (is.null(ylimits)) {
ylimits <- quantile(tib_gather$value, c(0.001, 0.999), na.rm = T)
}
fill_column <- NULL
plt <- tib_gather %>%
ggplot(aes(fill = fill_column))
# Set all counts tracks to the same limits
if (raster) {
plt <- plt +
rasterize(geom_rect(aes(xmin = start / 1e6, xmax = end / 1e6,
ymin = 0, ymax = value)),
dpi = 300)
} else {
plt <- plt +
geom_rect(aes(xmin = start / 1e6, xmax = end / 1e6,
ymin = 0, ymax = value))
}
plt <- plt +
geom_hline(yintercept = 0, size = 0.5) +
geom_line(data = centromeres.plt,
aes(x = value / 1e6, y = 0),
color = "black", size = 2) +
facet_grid(key ~ ., scales = "free_y") +
xlab(paste0(plot_chr, " (Mb)")) +
ylab("Score") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0.025, 0.025)) +
theme_classic()
if (! is.null(color_list)) {
if (lighten_negative) {
color_list <- c(color_list,
lighten(color_list, amount = 0.5))
}
colors <- levels(tib_gather$fill_column)
color_list <- color_list[1:length(colors)]
names(color_list) <- colors
#colors <- recode(colors, !!!color_list)
plt <- plt +
scale_fill_manual(values = color_list, guide = "none")
} else {
plt <- plt +
scale_fill_brewer(palette = "Set1", guide = "none")
}
if (fix_yaxis) {
plt <- plt +
coord_cartesian(xlim = c(max(c(plot_start,
min(tib_gather$start))) / 1e6,
min(c(plot_end,
max(tib_gather$end))) / 1e6),
ylim = ylimits)
} else {
plt <- plt +
coord_cartesian(xlim = c(max(c(plot_start,
min(tib_gather$start))) / 1e6,
min(c(plot_end,
max(tib_gather$end))) / 1e6))
}
if (! is.null(aspect_ratio)) {
plt <- plt +
theme(aspect.ratio = aspect_ratio)
}
plot(plt)
}
# Similar data tracks as the function above, but with the functionality to plot
# matching data tracks on top of each other
PlotDataTracksLightFaceted <- function(input_tib, exp_names, centromeres,
color_groups, facet_groups,
plot_chr = "chr1", size = 0.75,
plot_start = 1, plot_end = 500e6,
smooth = 1, color_list = NULL,
fix_yaxis = F, scale_tracks = F,
aspect_ratio = NULL, raster = T, ylimits = NULL) {
# # Exp names is a vector of sample names
# exp_search <- paste(exp_names, collapse = "|")
# Get the scores for the samples
tib_plot <- input_tib %>%
dplyr::select(seqnames, start, end,
all_of(exp_names))
if (smooth > 1) {
tib_plot <- tib_plot %>%
mutate_at(vars(contains("_")),
runmean, k = smooth)
}
if (scale_tracks) {
tib_plot <- tib_plot %>%
mutate_at(vars(contains("_")),
function(x) scale(x)[, 1])
}
# Filter for plotting window
tib_plot <- tib_plot %>%
filter(seqnames == plot_chr,
start >= plot_start,
end <= plot_end)
# Gather
tib_gather <- tib_plot %>%
gather(key, value, -seqnames, -start, -end) %>%
mutate(key = factor(key, levels = exp_names),
fill_column = color_groups[match(key, names(color_groups))],
fill_column = factor(fill_column, levels = unique(color_groups)),
facet_column = facet_groups[match(key, names(facet_groups))],
facet_column = factor(facet_column, levels = unique(facet_groups)))
# Centromeres
centromeres.plt <- as_tibble(centromeres) %>%
filter(seqnames == plot_chr) %>%
gather(key_centromeres, value, start, end)
# Plot
if (is.null(ylimits)) {
ylimits <- quantile(tib_gather$value, c(0.001, 0.999), na.rm = T)
}
fill_column <- NULL
plt <- tib_gather %>%
ggplot(aes(fill = fill_column))
# Set all counts tracks to the same limits
plt <- plt +
#geom_rect(aes(xmin = start / 1e6, xmax = end / 1e6,
# ymin = 0, ymax = value)) +
geom_hline(yintercept = 0, size = 0.5) +
geom_line(data = centromeres.plt,
aes(x = value / 1e6, y = 0),
color = "black", size = 2) +
facet_grid(facet_column ~ ., scales = "free_y") +
xlab(paste0(plot_chr, " (Mb)")) +
ylab("Score") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0.025, 0.025)) +
theme_classic()
if (raster) {
plt <- plt +
rasterize(geom_line(aes(x = (start+end)/2e6, y = value,
col = fill_column), size = size),
dpi = 300)
} else {
plt <- plt +
geom_line(aes(x = (start+end)/2e6, y = value,
col = fill_column), size = size)
}
if (! is.null(color_list)) {
colors <- levels(tib_gather$fill_column)
color_list <- color_list[1:length(colors)]
names(color_list) <- colors
#colors <- recode(colors, !!!color_list)
plt <- plt +
scale_fill_manual(values = color_list, guide = "none") +
scale_color_manual(values = color_list, guide = "none")
} else {
plt <- plt +
scale_fill_brewer(palette = "Set1", guide = "none") +
scale_color_brewer(palette = "Set1", guide = "none")
}
if (fix_yaxis) {
plt <- plt +
coord_cartesian(xlim = c(max(c(plot_start,
min(tib_gather$start))) / 1e6,
min(c(plot_end,
max(tib_gather$end))) / 1e6),
ylim = ylimits)
} else {
plt <- plt +
coord_cartesian(xlim = c(max(c(plot_start,
min(tib_gather$start))) / 1e6,
min(c(plot_end,
max(tib_gather$end))) / 1e6))
}
if (! is.null(aspect_ratio)) {
plt <- plt +
theme(aspect.ratio = aspect_ratio)
}
plot(plt)
}
PlotScatterBinned <- function(tib, n1, n2, color_by = NULL, identity = F,
n_min = 10, ylimits_col = c(-2.4, 2.4),
count_range = c(0, 400), smooth = 1,
remove_outliers = F) {
# Get tibble
tib_process <- tib %>%
dplyr::select(seqnames, all_of(n1), all_of(n2)) %>%
rename_all(~ c("seqnames", "n1", "n2"))
# Calculate pearson correlation without any smoothing
tib_cor <- round(cor(tib_process$n1, tib_process$n2, use = "complete.obs",
method = "pearson"), 2)
if (smooth > 1) {
tib_process <- tib_process %>%
group_by(seqnames) %>%
mutate(n1 = runmean(n1, k = smooth),
n2 = runmean(n2, k = smooth))
}
if (! is.null(color_by)) {
tib_process <- tib_process %>%
add_column(color = color_by)
}
tib_process <- tib_process %>%
drop_na()
# Change color range
if (! is.null(color_by)) {
limits_color <- quantile(tib_process$color, c(0.001, 0.999), na.rm = T)
tib_process$color[tib_process$color < limits_color[1]] <- limits_color[1]
tib_process$color[tib_process$color > limits_color[2]] <- limits_color[2]
}
# Replace outliers
if (remove_outliers) {
tib_process <- tib_process %>%
mutate(n1 = case_when(n1 > quantile(n1, 0.999) ~ quantile(n1, 0.999),
n1 < quantile(n1, 0.001) ~ quantile(n1, 0.001),
T ~ n1),
n2 = case_when(n2 > quantile(n2, 0.999) ~ quantile(n2, 0.999),
n2 < quantile(n2, 0.001) ~ quantile(n2, 0.001),
T ~ n2))
}
# Metrics
n1_min = min(tib_process$n1) - 0.001
n1_max = max(tib_process$n1) + 0.001
n1_binsize <- (n1_max - n1_min) / 49
n2_min = min(tib_process$n2) - 0.001
n2_max = max(tib_process$n2) + 0.001
n2_binsize <- (n2_max - n2_min) / 49
tib_summary <- tib_process %>%
mutate(n1_cut = cut(n1,
seq(n1_min, n1_max, length.out = 50)),
n2_cut = cut(n2,
seq(n2_min, n2_max, length.out = 50))) %>%
mutate(n1_bin = as.numeric(as.factor(n1_cut)),
n2_bin = as.numeric(as.factor(n2_cut))) %>%
mutate(n1_bin = n1_min - n1_binsize/2 + n1_bin * n1_binsize,
n2_bin = n2_min - n2_binsize/2 + n2_bin * n2_binsize) %>%
group_by(n1_bin, n2_bin)
# Plot
if (! is.null(color_by)) {
tib_summary <- tib_summary %>%
dplyr::summarise(n = n(),
mark = mean(color)) %>%
ungroup() %>%
filter(n >= n_min)
plt <- tib_summary %>%
ggplot(aes(x = n1_bin, y = n2_bin)) +
geom_tile(aes(fill = mark)) +
scale_fill_gradient2(low = "blue", mid = "grey", high = "red",
midpoint = 0, limits = ylimits_col,
na.value = "green")
} else {
tib_summary <- tib_summary %>%
dplyr::summarise(n = n()) %>%
ungroup() %>%
filter(n >= n_min)
plt <- tib_summary %>%
ggplot(aes(x = n1_bin, y = n2_bin)) +
geom_tile(aes(fill = n)) +
scale_fill_gradient(low = "lightgrey", high = "black", name = "Count",
limits = count_range, na.value = "green")
}
plt <- plt +
geom_hline(yintercept = 0, linetype = "dashed", col = "black") +
geom_vline(xintercept = 0, linetype = "dashed", col = "black") +
xlab(n1) +
ylab(n2) +
ggtitle(paste0("Pearson: ", tib_cor)) +
theme_bw() +
theme(aspect.ratio = 1)
if (identity) plt <- plt + geom_abline(slope = 1, intercept = 0,
col = "black", linetype = "dashed")
plot(plt)
}Let’s start with making some scatter plots.
# Get metadata
padamid_metadata_combined_aid <- padamid_metadata_combined %>%
filter(experiment == "Ki67_aid") %>%
filter(! grepl("control", condition),
! grepl("thymidine", condition))
# Get tibble
tib <- tib_padamid_combined %>%
mutate(# RO vs double-thy
RO_vs_doublethy_Ki67 = HCT116_AID_RO_Ki67 - HCT116_AID_doublethy_Ki67,
RO_vs_doublethy_LMNB1 = HCT116_AID_RO_LMNB1 - HCT116_AID_doublethy_LMNB1,
RO_vs_doublethy_H3K27me3 = HCT116_AID_RO_H3K27me3 - HCT116_AID_doublethy_H3K27me3,
RO_vs_doublethy_H3K9me3 = HCT116_AID_RO_H3K9me3 - HCT116_AID_doublethy_H3K9me3,
# IAA vs nonIAA, double-thy
doublethy_LMNB1_diff = HCT116_AID_doublethy_IAA_LMNB1 - HCT116_AID_doublethy_LMNB1,
doublethy_H3K27me3_diff = HCT116_AID_doublethy_IAA_H3K27me3 - HCT116_AID_doublethy_H3K27me3,
doublethy_H3K9me3_diff = HCT116_AID_doublethy_IAA_H3K9me3 - HCT116_AID_doublethy_H3K9me3,
# IAA vs nonIAA, RO
RO_LMNB1_diff = HCT116_AID_RO_IAA_LMNB1 - HCT116_AID_RO_LMNB1,
RO_H3K27me3_diff = HCT116_AID_RO_IAA_H3K27me3 - HCT116_AID_RO_H3K27me3,
RO_H3K9me3_diff = HCT116_AID_RO_IAA_H3K9me3 - HCT116_AID_RO_H3K9me3)
# Smoothing of differences?
smooth_differences = F
if (smooth_differences) {
tib <- tib %>%
group_by(seqnames) %>%
mutate_at(vars(contains("vs"), contains("diff")),
function(x) runmean(x, k = 3)) %>%
ungroup()
}
# Simple correlations
# 1) RO vs doublethy
PlotScatterBinned(tib, "HCT116_AID_doublethy_Ki67", "HCT116_AID_RO_Ki67",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, "HCT116_AID_doublethy_LMNB1", "HCT116_AID_RO_LMNB1",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, "HCT116_AID_doublethy_H3K27me3", "HCT116_AID_RO_H3K27me3",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, "HCT116_AID_doublethy_H3K9me3", "HCT116_AID_RO_H3K9me3",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
# 2) Effect of IAA
PlotScatterBinned(tib, "HCT116_AID_doublethy_LMNB1", "HCT116_AID_doublethy_IAA_LMNB1",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, "HCT116_AID_doublethy_H3K27me3", "HCT116_AID_doublethy_IAA_H3K27me3",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, "HCT116_AID_doublethy_H3K9me3", "HCT116_AID_doublethy_IAA_H3K9me3",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, "HCT116_AID_RO_LMNB1", "HCT116_AID_RO_IAA_LMNB1",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, "HCT116_AID_RO_H3K27me3", "HCT116_AID_RO_IAA_H3K27me3",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, "HCT116_AID_RO_H3K9me3", "HCT116_AID_RO_IAA_H3K9me3",
identity = T, count_range = c(0, 800))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
# Simple data tracks - for double-thy & RO
PlotDataTracksLight(input_tib = tib,
exp_names = c("HCT116_AID_doublethy_Ki67",
"HCT116_AID_doublethy_LMNB1",
"HCT116_AID_doublethy_IAA_LMNB1",
"HCT116_AID_doublethy_H3K27me3",
"HCT116_AID_doublethy_IAA_H3K27me3",
"HCT116_AID_doublethy_H3K9me3",
"HCT116_AID_doublethy_IAA_H3K9me3"),
centromeres,
color_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
HCT116_AID_doublethy_LMNB1 = "LMNB1",
HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
HCT116_AID_doublethy_H3K27me3 = "H3K27me3",
HCT116_AID_doublethy_IAA_H3K27me3 = "H3K27me3",
HCT116_AID_doublethy_H3K9me3 = "H3K9me3",
HCT116_AID_doublethy_IAA_H3K9me3 = "H3K9me3"),
smooth = 9, plot_chr = "chr11", fix_yaxis = T,
plot_start = 25e6, plot_end = 100e6,
color_list = colors_set1[c(1:4)])## Warning: Removed 441 rows containing missing values (geom_rect).
PlotDataTracksLight(input_tib = tib,
exp_names = c("HCT116_AID_RO_Ki67",
"HCT116_AID_RO_LMNB1",
"HCT116_AID_RO_IAA_LMNB1",
"HCT116_AID_RO_H3K27me3",
"HCT116_AID_RO_IAA_H3K27me3",
"HCT116_AID_RO_H3K9me3",
"HCT116_AID_RO_IAA_H3K9me3"),
centromeres,
color_groups = c(HCT116_AID_RO_Ki67 = "Ki67",
HCT116_AID_RO_LMNB1 = "LMNB1",
HCT116_AID_RO_IAA_LMNB1 = "LMNB1",
HCT116_AID_RO_H3K27me3 = "H3K27me3",
HCT116_AID_RO_IAA_H3K27me3 = "H3K27me3",
HCT116_AID_RO_H3K9me3 = "H3K9me3",
HCT116_AID_RO_IAA_H3K9me3 = "H3K9me3"),
smooth = 9, plot_chr = "chr11", fix_yaxis = T,
plot_start = 25e6, plot_end = 100e6,
color_list = colors_set1[c(1:4)])## Warning: Removed 395 rows containing missing values (geom_rect).
# Faceted plots
PlotDataTracksLightFaceted(input_tib = tib,
exp_names = c("HCT116_AID_doublethy_Ki67",
"HCT116_AID_doublethy_LMNB1",
"HCT116_AID_doublethy_IAA_LMNB1",
"HCT116_AID_doublethy_H3K27me3",
"HCT116_AID_doublethy_IAA_H3K27me3",
"HCT116_AID_doublethy_H3K9me3",
"HCT116_AID_doublethy_IAA_H3K9me3"),
centromeres,
color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
HCT116_AID_doublethy_LMNB1 = "minusIAA",
HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
HCT116_AID_doublethy_H3K27me3 = "minusIAA",
HCT116_AID_doublethy_IAA_H3K27me3 = "plusIAA",
HCT116_AID_doublethy_H3K9me3 = "minusIAA",
HCT116_AID_doublethy_IAA_H3K9me3 = "plusIAA"),
smooth = 9, plot_chr = "chr3", fix_yaxis = F,
plot_start = 40e6, plot_end = 91e6,
color_list = c("grey20", "red"),
facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
HCT116_AID_doublethy_LMNB1 = "LMNB1",
HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
HCT116_AID_doublethy_H3K27me3 = "H3K27me3",
HCT116_AID_doublethy_IAA_H3K27me3 = "H3K27me3",
HCT116_AID_doublethy_H3K9me3 = "H3K9me3",
HCT116_AID_doublethy_IAA_H3K9me3 = "H3K9me3"))## Warning: Removed 5 row(s) containing missing values (geom_path).
PlotDataTracksLightFaceted(input_tib = tib,
exp_names = c("HCT116_AID_RO_Ki67",
"HCT116_AID_RO_LMNB1",
"HCT116_AID_RO_IAA_LMNB1",
"HCT116_AID_RO_H3K27me3",
"HCT116_AID_RO_IAA_H3K27me3",
"HCT116_AID_RO_H3K9me3",
"HCT116_AID_RO_IAA_H3K9me3"),
centromeres,
color_groups = c(HCT116_AID_RO_Ki67 = "minusIAA",
HCT116_AID_RO_LMNB1 = "minusIAA",
HCT116_AID_RO_IAA_LMNB1 = "plusIAA",
HCT116_AID_RO_H3K27me3 = "minusIAA",
HCT116_AID_RO_IAA_H3K27me3 = "plusIAA",
HCT116_AID_RO_H3K9me3 = "minusIAA",
HCT116_AID_RO_IAA_H3K9me3 = "plusIAA"),
smooth = 9, plot_chr = "chr3", fix_yaxis = F,
plot_start = 40e6, plot_end = 91e6,
color_list = c("grey20", "red"),
facet_groups = c(HCT116_AID_RO_Ki67 = "Ki67",
HCT116_AID_RO_LMNB1 = "LMNB1",
HCT116_AID_RO_IAA_LMNB1 = "LMNB1",
HCT116_AID_RO_H3K27me3 = "H3K27me3",
HCT116_AID_RO_IAA_H3K27me3 = "H3K27me3",
HCT116_AID_RO_H3K9me3 = "H3K9me3",
HCT116_AID_RO_IAA_H3K9me3 = "H3K9me3"))## Warning: Removed 5 row(s) containing missing values (geom_path).
# Only the difference
PlotDataTracksLightFaceted(input_tib = tib,
exp_names = c("doublethy_LMNB1_diff",
"doublethy_H3K27me3_diff",
"doublethy_H3K9me3_diff"),
centromeres,
color_groups = c(doublethy_LMNB1_diff = "LMNB1",
doublethy_H3K27me3_diff = "H3K27me3",
doublethy_H3K9me3_diff = "H3K9me3"),
smooth = 51, plot_chr = "chr3",
plot_start = 40e6, plot_end = 91e6,
color_list = colors_set1[c(2:4)],
facet_groups = c(doublethy_LMNB1_diff = "diff",
doublethy_H3K27me3_diff = "diff",
doublethy_H3K9me3_diff = "diff"))PlotDataTracksLightFaceted(input_tib = tib,
exp_names = c("RO_LMNB1_diff",
"RO_H3K27me3_diff",
"RO_H3K9me3_diff"),
centromeres,
color_groups = c(RO_LMNB1_diff = "LMNB1",
RO_H3K27me3_diff = "H3K27me3",
RO_H3K9me3_diff = "H3K9me3"),
smooth = 51, plot_chr = "chr3",
plot_start = 40e6, plot_end = 91e6,
color_list = colors_set1[c(2:4)],
facet_groups = c(RO_LMNB1_diff = "diff",
RO_H3K27me3_diff = "diff",
RO_H3K9me3_diff = "diff"))# Other site
PlotDataTracksLightFaceted(input_tib = tib,
exp_names = c("HCT116_AID_doublethy_Ki67",
"HCT116_AID_doublethy_LMNB1",
"HCT116_AID_doublethy_IAA_LMNB1",
"HCT116_AID_doublethy_H3K27me3",
"HCT116_AID_doublethy_IAA_H3K27me3",
"HCT116_AID_doublethy_H3K9me3",
"HCT116_AID_doublethy_IAA_H3K9me3"),
centromeres,
color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
HCT116_AID_doublethy_LMNB1 = "minusIAA",
HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
HCT116_AID_doublethy_H3K27me3 = "minusIAA",
HCT116_AID_doublethy_IAA_H3K27me3 = "plusIAA",
HCT116_AID_doublethy_H3K9me3 = "minusIAA",
HCT116_AID_doublethy_IAA_H3K9me3 = "plusIAA"),
smooth = 9, plot_chr = "chr15", fix_yaxis = F,
plot_start = 18e6, plot_end = 80e6,
color_list = c("grey20", "red"),
facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
HCT116_AID_doublethy_LMNB1 = "LMNB1",
HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
HCT116_AID_doublethy_H3K27me3 = "H3K27me3",
HCT116_AID_doublethy_IAA_H3K27me3 = "H3K27me3",
HCT116_AID_doublethy_H3K9me3 = "H3K9me3",
HCT116_AID_doublethy_IAA_H3K9me3 = "H3K9me3"))## Warning: Removed 64 row(s) containing missing values (geom_path).
PlotDataTracksLightFaceted(input_tib = tib,
exp_names = c("HCT116_AID_RO_Ki67",
"HCT116_AID_RO_LMNB1",
"HCT116_AID_RO_IAA_LMNB1",
"HCT116_AID_RO_H3K27me3",
"HCT116_AID_RO_IAA_H3K27me3",
"HCT116_AID_RO_H3K9me3",
"HCT116_AID_RO_IAA_H3K9me3"),
centromeres,
color_groups = c(HCT116_AID_RO_Ki67 = "minusIAA",
HCT116_AID_RO_LMNB1 = "minusIAA",
HCT116_AID_RO_IAA_LMNB1 = "plusIAA",
HCT116_AID_RO_H3K27me3 = "minusIAA",
HCT116_AID_RO_IAA_H3K27me3 = "plusIAA",
HCT116_AID_RO_H3K9me3 = "minusIAA",
HCT116_AID_RO_IAA_H3K9me3 = "plusIAA"),
smooth = 9, plot_chr = "chr15", fix_yaxis = F,
plot_start = 18e6, plot_end = 80e6,
color_list = c("grey20", "red"),
facet_groups = c(HCT116_AID_RO_Ki67 = "Ki67",
HCT116_AID_RO_LMNB1 = "LMNB1",
HCT116_AID_RO_IAA_LMNB1 = "LMNB1",
HCT116_AID_RO_H3K27me3 = "H3K27me3",
HCT116_AID_RO_IAA_H3K27me3 = "H3K27me3",
HCT116_AID_RO_H3K9me3 = "H3K9me3",
HCT116_AID_RO_IAA_H3K9me3 = "H3K9me3"))## Warning: Removed 32 row(s) containing missing values (geom_path).
PlotDataTracksLightFaceted(input_tib = tib %>%
mutate(delta_lmnb1 = HCT116_AID_doublethy_IAA_LMNB1 -
HCT116_AID_doublethy_LMNB1),
exp_names = c("HCT116_AID_doublethy_Ki67",
"HCT116_AID_doublethy_LMNB1",
"HCT116_AID_doublethy_IAA_LMNB1",
"delta_lmnb1"),
centromeres,
color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
HCT116_AID_doublethy_LMNB1 = "minusIAA",
HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
delta_lmnb1 = "delta"),
smooth = 51, plot_chr = "chr3", fix_yaxis = F,
#plot_start = 40e6, plot_end = 91e6,
color_list = c("grey20", "red", "grey50"),
facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
HCT116_AID_doublethy_LMNB1 = "LMNB1",
HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
delta_lmnb1 = "delta_LMNB1"))PlotDataTracksLightFaceted(input_tib = tib %>%
mutate(delta_lmnb1 = HCT116_AID_RO_IAA_LMNB1 -
HCT116_AID_RO_LMNB1),
exp_names = c("HCT116_AID_RO_Ki67",
"HCT116_AID_RO_LMNB1",
"HCT116_AID_RO_IAA_LMNB1",
"delta_lmnb1"),
centromeres,
color_groups = c(HCT116_AID_RO_Ki67 = "minusIAA",
HCT116_AID_RO_LMNB1 = "minusIAA",
HCT116_AID_RO_IAA_LMNB1 = "plusIAA",
delta_lmnb1 = "delta"),
smooth = 51, plot_chr = "chr3", fix_yaxis = F,
#plot_start = 40e6, plot_end = 91e6,
color_list = c("grey20", "red", "grey50"),
facet_groups = c(HCT116_AID_RO_Ki67 = "Ki67",
HCT116_AID_RO_LMNB1 = "LMNB1",
HCT116_AID_RO_IAA_LMNB1 = "LMNB1",
delta_lmnb1 = "delta_LMNB1"))# Original data
PlotDataTracksLight(input_tib = tib,
exp_names = c("HCT116_AID_doublethy_Ki67",
"HCT116_AID_doublethy_LMNB1",
"HCT116_AID_doublethy_IAA_LMNB1"),
centromeres,
color_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
HCT116_AID_doublethy_LMNB1 = "LMNB1",
HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1"),
smooth = 9, plot_chr = "chr3", fix_yaxis = T,
#plot_start = 25e6, plot_end = 100e6,
color_list = colors_set1[c(1:2)])## Warning: Removed 116 rows containing missing values (geom_rect).
PlotDataTracksLight(input_tib = tib,
exp_names = c("HCT116_AID_RO_Ki67",
"HCT116_AID_RO_LMNB1",
"HCT116_AID_RO_IAA_LMNB1"),
centromeres,
color_groups = c(HCT116_AID_RO_Ki67 = "Ki67",
HCT116_AID_RO_LMNB1 = "LMNB1",
HCT116_AID_RO_IAA_LMNB1 = "LMNB1"),
smooth = 9, plot_chr = "chr3", fix_yaxis = T,
#plot_start = 25e6, plot_end = 100e6,
color_list = colors_set1[c(1:2)])## Warning: Removed 116 rows containing missing values (geom_rect).
for (chr in chromosomes) {
# doublethy
PlotDataTracksLight(input_tib = tib,
exp_names = c("HCT116_AID_doublethy_Ki67",
"HCT116_AID_doublethy_LMNB1",
"doublethy_LMNB1_diff",
"doublethy_H3K27me3_diff",
"doublethy_H3K9me3_diff"),
centromeres,
color_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
HCT116_AID_doublethy_LMNB1 = "LMNB1",
doublethy_LMNB1_diff = "LMNB1",
doublethy_H3K27me3_diff = "H3K27me3",
doublethy_H3K9me3_diff = "H3K9me3"),
smooth = 15, plot_chr = chr, fix_yaxis = F,
color_list = colors_set1[c(1:4)])
# RO
PlotDataTracksLight(input_tib = tib,
exp_names = c("HCT116_AID_RO_Ki67",
"HCT116_AID_RO_LMNB1",
"RO_LMNB1_diff",
"RO_H3K27me3_diff",
"RO_H3K9me3_diff"),
centromeres,
color_groups = c(HCT116_AID_RO_Ki67 = "Ki67",
HCT116_AID_RO_LMNB1 = "LMNB1",
RO_LMNB1_diff = "LMNB1",
RO_H3K27me3_diff = "H3K27me3",
RO_H3K9me3_diff = "H3K9me3"),
smooth = 15, plot_chr = chr, fix_yaxis = F,
color_list = colors_set1[c(1:4)])
}## Warning: Removed 1740 rows containing missing values (geom_rect).
## Warning: Removed 1739 rows containing missing values (geom_rect).
## Warning: Removed 165 rows containing missing values (geom_rect).
## Warning: Removed 165 rows containing missing values (geom_rect).
## Warning: Removed 145 rows containing missing values (geom_rect).
## Warning: Removed 145 rows containing missing values (geom_rect).
## Warning: Removed 145 rows containing missing values (geom_rect).
## Warning: Removed 145 rows containing missing values (geom_rect).
## Warning: Removed 184 rows containing missing values (geom_rect).
## Warning: Removed 170 rows containing missing values (geom_rect).
## Warning: Removed 105 rows containing missing values (geom_rect).
## Warning: Removed 105 rows containing missing values (geom_rect).
## Warning: Removed 205 rows containing missing values (geom_rect).
## Warning: Removed 205 rows containing missing values (geom_rect).
## Warning: Removed 109 rows containing missing values (geom_rect).
## Warning: Removed 105 rows containing missing values (geom_rect).
## Warning: Removed 1670 rows containing missing values (geom_rect).
## Warning: Removed 1675 rows containing missing values (geom_rect).
## Warning: Removed 145 rows containing missing values (geom_rect).
## Warning: Removed 145 rows containing missing values (geom_rect).
## Warning: Removed 292 rows containing missing values (geom_rect).
## Warning: Removed 255 rows containing missing values (geom_rect).
## Warning: Removed 185 rows containing missing values (geom_rect).
## Warning: Removed 184 rows containing missing values (geom_rect).
## Warning: Removed 1640 rows containing missing values (geom_rect).
## Warning: Removed 1634 rows containing missing values (geom_rect).
## Warning: Removed 1702 rows containing missing values (geom_rect).
## Warning: Removed 1679 rows containing missing values (geom_rect).
## Warning: Removed 1845 rows containing missing values (geom_rect).
## Warning: Removed 1830 rows containing missing values (geom_rect).
## Warning: Removed 930 rows containing missing values (geom_rect).
## Warning: Removed 915 rows containing missing values (geom_rect).
## Warning: Removed 320 rows containing missing values (geom_rect).
## Warning: Removed 312 rows containing missing values (geom_rect).
## Warning: Removed 479 rows containing missing values (geom_rect).
## Warning: Removed 479 rows containing missing values (geom_rect).
## Warning: Removed 200 rows containing missing values (geom_rect).
## Warning: Removed 189 rows containing missing values (geom_rect).
## Warning: Removed 145 rows containing missing values (geom_rect).
## Warning: Removed 143 rows containing missing values (geom_rect).
## Warning: Removed 608 rows containing missing values (geom_rect).
## Warning: Removed 610 rows containing missing values (geom_rect).
## Warning: Removed 1172 rows containing missing values (geom_rect).
## Warning: Removed 1177 rows containing missing values (geom_rect).
## Warning: Removed 250 rows containing missing values (geom_rect).
## Warning: Removed 245 rows containing missing values (geom_rect).
Can I reproduce the cell cycle effects for Ki67 interactions?
# Plot chromosomal differences
tib %>%
group_by(seqnames) %>%
dplyr::summarise(Ki67 = mean(RO_vs_doublethy_Ki67, na.rm = T)) %>%
ungroup() %>%
mutate(seqnames = factor(seqnames, levels = chrom_sizes$seqnames)) %>%
ggplot(aes(x = seqnames, y = Ki67)) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_point() +
ylab("Difference G2 versus S-phase") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))# How about for all marks?
tib %>%
group_by(seqnames) %>%
dplyr::summarise(Ki67 = mean(RO_vs_doublethy_Ki67, na.rm = T),
LMNB1 = mean(RO_vs_doublethy_LMNB1, na.rm = T),
H3K27me3 = mean(RO_vs_doublethy_H3K27me3, na.rm = T),
H3K9me3 = mean(RO_vs_doublethy_H3K9me3, na.rm = T)) %>%
ungroup() %>%
gather(target, value, -seqnames) %>%
mutate(seqnames = factor(seqnames, chrom_sizes$seqnames),
target = factor(target, levels(padamid_metadata_combined$target))) %>%
ggplot(aes(x = seqnames, y = value, col = target)) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_point() +
ylab("Difference G2 versus S-phase") +
scale_color_manual(values = colors_set1) +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))Yes. That’s good.
So, can I show that auxin depletion results in perturbed lamina interactions in regions that are high in initial Ki67 binding?
Some issues: * Data is quite noisy, in particular Ki67 tracks * Small chromosomes are high in Ki67, but seem to stay interior after Ki67 depletion as well
# Plot Ki67 score versus the differences?
PlotScatterBinned(tib, n1 = "HCT116_AID_doublethy_Ki67", n2 = "doublethy_LMNB1_diff",
smooth = 1, count_range = c(0, 1000))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, n1 = "HCT116_AID_doublethy_Ki67", n2 = "doublethy_H3K27me3_diff",
smooth = 1, count_range = c(0, 1000))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, n1 = "HCT116_AID_doublethy_Ki67", n2 = "doublethy_H3K9me3_diff",
smooth = 1, count_range = c(0, 1000))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, n1 = "HCT116_AID_RO_Ki67", n2 = "RO_LMNB1_diff",
smooth = 1, count_range = c(0, 1000))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, n1 = "HCT116_AID_RO_Ki67", n2 = "RO_H3K27me3_diff",
smooth = 1, count_range = c(0, 1000))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, n1 = "HCT116_AID_RO_Ki67", n2 = "RO_H3K9me3_diff",
smooth = 1, count_range = c(0, 1000))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
# This seems to noisy
# Plot LaminB1 scores, highlight initial Ki67
PlotScatterBinned(tib, smooth = 1,
n1 = "HCT116_AID_doublethy_LMNB1",
n2 = "HCT116_AID_doublethy_IAA_LMNB1",
color_by = tib$HCT116_AID_doublethy_Ki67, ylimits_col = c(-2.5, 2.5))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 1,
n1 = "HCT116_AID_doublethy_H3K27me3",
n2 = "HCT116_AID_doublethy_IAA_H3K27me3",
color_by = tib$HCT116_AID_doublethy_Ki67, ylimits_col = c(-2.5, 2.5))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 1,
n1 = "HCT116_AID_doublethy_H3K9me3",
n2 = "HCT116_AID_doublethy_IAA_H3K9me3",
color_by = tib$HCT116_AID_doublethy_Ki67, ylimits_col = c(-2.5, 2.5))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 1,
n1 = "HCT116_AID_RO_LMNB1",
n2 = "HCT116_AID_RO_IAA_LMNB1",
color_by = tib$HCT116_AID_RO_Ki67, ylimits_col = c(-2.5, 2.5))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 1,
n1 = "HCT116_AID_RO_H3K27me3",
n2 = "HCT116_AID_RO_IAA_H3K27me3",
color_by = tib$HCT116_AID_RO_Ki67, ylimits_col = c(-2.5, 2.5))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 1,
n1 = "HCT116_AID_RO_H3K9me3",
n2 = "HCT116_AID_RO_IAA_H3K9me3",
color_by = tib$HCT116_AID_RO_Ki67, ylimits_col = c(-2.5, 2.5))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
# Plot LaminB1 scores, highlight initial Ki67 - 3 bins smoothing?
PlotScatterBinned(tib, smooth = 3,
n1 = "HCT116_AID_doublethy_LMNB1",
n2 = "HCT116_AID_doublethy_IAA_LMNB1",
color_by = tib$HCT116_AID_doublethy_Ki67, ylimits_col = c(-2.8, 2.8))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 3,
n1 = "HCT116_AID_doublethy_H3K27me3",
n2 = "HCT116_AID_doublethy_IAA_H3K27me3",
color_by = tib$HCT116_AID_doublethy_Ki67, ylimits_col = c(-2.8, 2.8))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 3,
n1 = "HCT116_AID_doublethy_H3K9me3",
n2 = "HCT116_AID_doublethy_IAA_H3K9me3",
color_by = tib$HCT116_AID_doublethy_Ki67, ylimits_col = c(-2.8, 2.8))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 3,
n1 = "HCT116_AID_RO_LMNB1",
n2 = "HCT116_AID_RO_IAA_LMNB1",
color_by = tib$HCT116_AID_RO_Ki67, ylimits_col = c(-2.8, 2.8))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 3,
n1 = "HCT116_AID_RO_H3K27me3",
n2 = "HCT116_AID_RO_IAA_H3K27me3",
color_by = tib$HCT116_AID_RO_Ki67, ylimits_col = c(-2.8, 2.8))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib, smooth = 3,
n1 = "HCT116_AID_RO_H3K9me3",
n2 = "HCT116_AID_RO_IAA_H3K9me3",
color_by = tib$HCT116_AID_RO_Ki67, ylimits_col = c(-2.8, 2.8))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
# Looks better
# Something else: correlation per chromosome with the difference
tib_cor <- tib %>%
group_by(seqnames) %>%
drop_na() %>%
dplyr::summarise(doublethy_LMNB1 = cor(HCT116_AID_doublethy_Ki67,
doublethy_LMNB1_diff),
doublethy_H3K27me3 = cor(HCT116_AID_doublethy_Ki67,
doublethy_H3K27me3_diff),
doublethy_H3K9me3 = cor(HCT116_AID_doublethy_Ki67,
doublethy_H3K9me3_diff),
RO_LMNB1 = cor(HCT116_AID_RO_Ki67,
RO_LMNB1_diff),
RO_H3K27me3 = cor(HCT116_AID_RO_Ki67,
RO_H3K27me3_diff),
RO_H3K9me3 = cor(HCT116_AID_RO_Ki67,
RO_H3K9me3_diff)) %>%
ungroup() %>%
gather(key, value, -seqnames) %>%
separate(key, c("condition", "target"), remove = F) %>%
mutate(condition = factor(condition, levels(padamid_metadata_combined$condition)),
target = factor(target, levels(padamid_metadata_combined$target)),
seqnames = factor(seqnames, chrom_sizes$seqnames))
tib_cor %>%
ggplot(aes(x = seqnames, y = value, col = target)) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_point(size = 3) +
facet_grid(. ~ condition) +
ylab("Pearson correlation between Ki67 and auxin-induced difference") +
scale_color_manual(values = colors_set1[2:4]) +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))# Mweh. Difficult to interpret.
# How about boxplots per Ki67 deciles?
tib_decile_doublethy <- tib %>%
drop_na() %>%
mutate(ki67_decile = case_when(HCT116_AID_doublethy_Ki67 > 1 ~ "high",
HCT116_AID_doublethy_Ki67 < -1 ~ "low",
T ~ "medium"),
ki67_decile = factor(ki67_decile, c("low", "medium", "high"))) %>%
dplyr::select(seqnames, ki67_decile, starts_with("doublethy")) %>%
gather(key, value, -seqnames, -ki67_decile) %>%
mutate(key = str_remove(key, "_diff"),
key = str_remove(key, ".*_"),
key = factor(key, levels(padamid_metadata_combined$target)))
tib_decile_doublethy %>%
ggplot(aes(x = ki67_decile, y = value, fill = ki67_decile)) +
geom_boxplot(outlier.shape = NA) +
coord_cartesian(ylim = c(-1.3, 1.3)) +
facet_grid(. ~ key) +
theme_bw() +
theme(aspect.ratio = 1)tib_decile_RO <- tib %>%
drop_na() %>%
arrange(HCT116_AID_RO_Ki67) %>%
mutate(ki67_decile = case_when(HCT116_AID_RO_Ki67 > 1 ~ "high",
HCT116_AID_RO_Ki67 < -1 ~ "low",
T ~ "medium"),
ki67_decile = factor(ki67_decile, c("low", "medium", "high"))) %>%
dplyr::select(seqnames, ki67_decile, starts_with("RO") & ends_with("diff")) %>%
gather(key, value, -seqnames, -ki67_decile) %>%
mutate(key = str_remove(key, "_diff"),
key = str_remove(key, ".*_"),
key = factor(key, levels(padamid_metadata_combined$target)))
tib_decile_RO %>%
ggplot(aes(x = ki67_decile, y = value, fill = ki67_decile)) +
geom_boxplot(outlier.shape = NA) +
coord_cartesian(ylim = c(-1.3, 1.3)) +
facet_grid(. ~ key) +
theme_bw() +
theme(aspect.ratio = 1)# Mweh. Not clear.
# Something different even - correlations not by chromosome
tib_cor <- tib %>%
# Smoothing of 3 bins?
mutate_at(vars(contains("vs"), contains("diff")),
function(x) runmean(x, k = 3)) %>%
drop_na() %>%
dplyr::summarise(doublethy_LMNB1 = cor(HCT116_AID_doublethy_Ki67,
doublethy_LMNB1_diff),
doublethy_H3K27me3 = cor(HCT116_AID_doublethy_Ki67,
doublethy_H3K27me3_diff),
doublethy_H3K9me3 = cor(HCT116_AID_doublethy_Ki67,
doublethy_H3K9me3_diff),
RO_LMNB1 = cor(HCT116_AID_RO_Ki67,
RO_LMNB1_diff),
RO_H3K27me3 = cor(HCT116_AID_RO_Ki67,
RO_H3K27me3_diff),
RO_H3K9me3 = cor(HCT116_AID_RO_Ki67,
RO_H3K9me3_diff)) %>%
gather(key, value) %>%
separate(key, c("condition", "target"), remove = F) %>%
mutate(condition = factor(condition, levels(padamid_metadata_combined$condition)),
target = factor(target, levels(padamid_metadata_combined$target)))
tib_cor %>%
ggplot(aes(x = condition, y = value, col = target)) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_point(size = 3) +
xlab("") +
ylab("Pearson correlation between Ki67 and auxin-induced difference") +
scale_color_manual(values = colors_set1[2:4]) +
theme_bw() +
theme(aspect.ratio = 1.5,
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))tib_cor %>%
print(n = 20)## # A tibble: 6 Ă— 4
## key condition target value
## <chr> <fct> <fct> <dbl>
## 1 doublethy_LMNB1 doublethy LMNB1 0.291
## 2 doublethy_H3K27me3 doublethy H3K27me3 0.246
## 3 doublethy_H3K9me3 doublethy H3K9me3 -0.0349
## 4 RO_LMNB1 RO LMNB1 0.205
## 5 RO_H3K27me3 RO H3K27me3 0.127
## 6 RO_H3K9me3 RO H3K9me3 0.124
# Easy to explain, easy to interpret, show only a fraction of the data and the
# entire story
#
# Let's go for it
# Include distance to centromere / smoothing of Ki-67 differences
tib_tmp <- tib %>%
mutate(distance_to_centromere = mcols(distanceToNearest(as(tib, "GRanges"),
centromeres))$distance,
distance_to_centromere = distance_to_centromere / 1e6) %>%
mutate_at(vars(matches("HCT116|diff")), function(x) runmean(x, k = 9))
# Scatterplot
PlotScatterBinned(tib_tmp, smooth = 3,
n1 = "HCT116_AID_doublethy_Ki67",
n2 = "doublethy_LMNB1_diff",
color_by = log10(tib_tmp$distance_to_centromere+1), ylimits_col = c(-5, 5))## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
# All scatterplots
tib_cor <- tibble()
conditions <- c("doublethy", "RO")
targets <- c("LMNB1", "H3K27me3", "H3K9me3")
for (condition in conditions) {
for (target in targets) {
# Get the data
tib_tmp <- tib_padamid_combined %>%
dplyr::select(1:3,
matches("AID") &
matches(condition) &
matches(target)) %>%
rename_at(4:5, ~ c("DMSO", "IAA")) %>%
drop_na()
# Scatterplot
#PlotScatterBinned(tib_tmp, "DMSO", "IAA", count_range = c(0, 750))
# Determine pearson correlation
cor_tmp <- cor(tib_tmp$DMSO, tib_tmp$IAA)
# Add this to tib_cor object
tib_cor <- bind_rows(tib_cor,
tibble(condition = condition,
target = target,
cor = cor_tmp))
}
}
# Add factors
tib_cor <- tib_cor %>%
mutate(condition = factor(condition, levels = conditions),
target = factor(target, levels = targets))
# Combined plot
tib_cor %>%
ggplot(aes(x = target, y = cor, col = condition)) +
geom_point(size = 3) +
xlab("") +
ylab("Pearson correlation") +
scale_color_manual(values = colors_set3) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))# Add distance to centromeres
dis <- distanceToNearest(as(tib, "GRanges"), centromeres)
tib$distance_to_centromere <- mcols(dis)$distance / 1e6
# Summary per chromosome
tib_summary <- tib %>%
filter(distance_to_centromere < 2) %>%
drop_na() %>%
gather(key, value, contains("diff")) %>%
group_by(seqnames, key) %>%
summarise(mean = mean(value)) %>%
ungroup() %>%
separate(key, c("condition", "target"), remove = F) %>%
mutate(condition = factor(condition, c("doublethy", "RO")),
target = factor(target, levels(padamid_metadata_combined$target)),
seqnames = factor(seqnames, chrom_sizes$seqnames),
rdna = seqnames %in% paste0("chr", c(13, 14, 15, 21, 22))) %>%
mutate(size = chrom_sizes$length[match(seqnames,
chrom_sizes$seqnames)],
size = size / 1e6)## `summarise()` has grouped output by 'seqnames'. You can override using the `.groups` argument.
## Warning: Expected 2 pieces. Additional pieces discarded in 138 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# Plot
tib_summary %>%
ggplot(aes(x = seqnames, y = mean, col = target, shape = rdna)) +
geom_point(size = 2) +
geom_hline(yintercept = 0, col = "black") +
facet_grid(. ~ condition) +
xlab("") +
ylab("Ki67 score near centromeres") +
scale_color_manual(values = colors_set1[2:4]) +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))Is this due to DNA accessibility changes?
# Load Dam counts, for some samples
# List files
count_files <- dir("results/tracks/counts/bin-50kb/", pattern = "Dam",
recursive = T, full.names = T)
count_files <- grep("AID", count_files, value = T)
count_files <- grep("doublethy|RO", count_files, value = T)
# Prepare into metadata
count_metadata <- tibble(file = count_files) %>%
mutate(sample = basename(str_remove(file, "\\..*")),
sample = str_remove(sample, "_Dam.*"),
sample = str_remove(sample, ".*AID_")) %>%
mutate(condition = case_when(str_detect(sample, "double") ~ "doublethy",
T ~ "RO"),
treatment = case_when(str_detect(sample, "IAA") ~ "plusIAA",
T ~ "minusIAA"),
replicate = str_remove(sample, ".*_"),
class = paste(condition, treatment, sep = "_"))
# Load count bigwig files - combine into one tibble
tmp <- tibble(seqnames = character(),
start = numeric(),
end = numeric())
for (i in 1:nrow(count_metadata)) {
f_name <- count_metadata$sample[i]
f_tib <- as_tibble(import(count_metadata$file[i])) %>%
dplyr::select(-width, -strand) %>%
rename(score = f_name) %>%
mutate(seqnames = as.character(seqnames))
tmp <- full_join(tmp, f_tib)
}## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
## Joining, by = c("seqnames", "start", "end")
# Mean of replicates and bigger bins
ovl <- findOverlaps(as(tmp, "GRanges"), as(tib, "GRanges"),
select = "arbitrary")
tmp <- tmp %>%
add_column(bin = ovl) %>%
gather(key, value, count_metadata$sample) %>%
mutate(class = count_metadata$class[match(key, count_metadata$sample)]) %>%
group_by(bin, class) %>%
dplyr::summarise(mean = mean(value, na.rm = T)) %>%
spread(class, mean) %>%
arrange(bin) %>%
drop_na(bin)## `summarise()` has grouped output by 'bin'. You can override using the `.groups` argument.
# Prepare count GRanges
gr_count <- as(tib, "GRanges")
mcols(gr_count) <- NULL
mcols(gr_count)[, names(tmp)[2:ncol(tmp)]] <- NA
mcols(gr_count)[tmp$bin, names(tmp)[2:ncol(tmp)]] <- tmp[, 2:ncol(tmp)]
# Prepare count tibble & determine differences
tib_count <- as_tibble(gr_count) %>%
dplyr::select(-strand, -width) %>%
mutate(doublethy_diff = log2((doublethy_plusIAA+1) / (doublethy_minusIAA+1)),
RO_diff = log2((RO_plusIAA+1) / (RO_minusIAA+1)))
# Repeat centromere exercise
# Add distance to centromeres
dis <- distanceToNearest(as(tib_count, "GRanges"), centromeres)
tib_count$distance_to_centromere <- mcols(dis)$distance / 1e6
# Summary per chromosome
tib_summary <- tib_count %>%
filter(distance_to_centromere < 2) %>%
drop_na() %>%
gather(key, value, contains("diff")) %>%
group_by(seqnames, key) %>%
summarise(mean = mean(value)) %>%
ungroup() %>%
separate(key, c("condition"), remove = F) %>%
mutate(condition = factor(condition, c("doublethy", "RO")),
seqnames = factor(seqnames, chrom_sizes$seqnames),
rdna = seqnames %in% paste0("chr", c(13, 14, 15, 21, 22))) %>%
mutate(size = chrom_sizes$length[match(seqnames,
chrom_sizes$seqnames)],
size = size / 1e6)## `summarise()` has grouped output by 'seqnames'. You can override using the `.groups` argument.
## Warning: Expected 1 pieces. Additional pieces discarded in 46 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# Plot
tib_summary %>%
ggplot(aes(x = seqnames, y = mean, col = condition, shape = rdna)) +
geom_point(size = 2) +
geom_hline(yintercept = 0, col = "black") +
xlab("") +
ylab("Dam only differences near centromere") +
scale_color_manual(values = colors_set3) +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))# Alternative, correlate Ki67 versus the change in Dam signal
tib_combined <- full_join(tib, tib_count)## Joining, by = c("seqnames", "start", "end", "distance_to_centromere")
PlotScatterBinned(tib_combined, "HCT116_AID_doublethy_Ki67", "doublethy_diff")## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib_combined, "HCT116_AID_RO_Ki67", "RO_diff")## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib_combined, "doublethy_minusIAA", "doublethy_plusIAA",
color_by = tib_combined$HCT116_AID_doublethy_Ki67,
remove_outliers = T)## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
PlotScatterBinned(tib_combined, "RO_minusIAA", "RO_plusIAA",
color_by = tib_combined$HCT116_AID_RO_Ki67,
remove_outliers = T)## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.
Yes, Ki67 depletion results in increased LaminB1 interactions for regions initially high in Ki67. The differences are quite small, which means that this analysis is strongly affected by noisiness. I decided not to smooth anything.
I believe that the observations that I will present are legit, but might only tell a fraction of the complete story. Let’s go for it, though.
sessionInfo()## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] knitr_1.33 caTools_1.18.2 corrr_0.4.3
## [4] GGally_2.1.2 RColorBrewer_1.1-2 rtracklayer_1.50.0
## [7] GenomicRanges_1.42.0 GenomeInfoDb_1.26.7 IRanges_2.24.1
## [10] S4Vectors_0.28.1 BiocGenerics_0.36.1 forcats_0.5.1
## [13] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4
## [16] readr_2.0.0 tidyr_1.1.3 tibble_3.1.6
## [19] ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 matrixStats_0.60.0
## [3] fs_1.5.0 lubridate_1.7.10
## [5] httr_1.4.2 tools_4.0.5
## [7] backports_1.2.1 bslib_0.2.5.1
## [9] utf8_1.2.2 R6_2.5.1
## [11] DBI_1.1.1 colorspace_2.0-2
## [13] withr_2.4.2 tidyselect_1.1.1
## [15] compiler_4.0.5 cli_3.1.0
## [17] rvest_1.0.1 Biobase_2.50.0
## [19] xml2_1.3.2 DelayedArray_0.16.3
## [21] labeling_0.4.2 sass_0.4.0
## [23] scales_1.1.1 digest_0.6.28
## [25] Rsamtools_2.6.0 rmarkdown_2.11
## [27] XVector_0.30.0 pkgconfig_2.0.3
## [29] htmltools_0.5.1.1 MatrixGenerics_1.2.1
## [31] highr_0.9 dbplyr_2.1.1
## [33] rlang_0.4.12 readxl_1.3.1
## [35] rstudioapi_0.13 farver_2.1.0
## [37] jquerylib_0.1.4 generics_0.1.0
## [39] jsonlite_1.7.2 BiocParallel_1.24.1
## [41] RCurl_1.98-1.3 magrittr_2.0.1
## [43] GenomeInfoDbData_1.2.4 Matrix_1.3-4
## [45] Rcpp_1.0.7 munsell_0.5.0
## [47] fansi_0.5.0 lifecycle_1.0.1
## [49] stringi_1.7.3 yaml_2.2.1
## [51] SummarizedExperiment_1.20.0 zlibbioc_1.36.0
## [53] plyr_1.8.6 grid_4.0.5
## [55] crayon_1.4.2 lattice_0.20-44
## [57] Biostrings_2.58.0 haven_2.4.1
## [59] hms_1.1.0 pillar_1.6.4
## [61] codetools_0.2-18 reprex_2.0.0
## [63] XML_3.99-0.6 glue_1.5.0
## [65] evaluate_0.14 modelr_0.1.8
## [67] vctrs_0.3.8 tzdb_0.1.2
## [69] cellranger_1.1.0 gtable_0.3.0
## [71] reshape_0.8.8 assertthat_0.2.1
## [73] xfun_0.24 broom_0.7.9
## [75] GenomicAlignments_1.26.0 ellipsis_0.3.2